library(Seurat)
library(tidyverse) # dplyr and ggplot2

# List files
files <- list.files(path = "C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/R project dowloaded from manual/GettingStarted_scRNASeq/data", recursive = T, pattern = "*.h5")

files
## [1] "D10_filtered_feature_bc_matrix.h5" "D16_filtered_feature_bc_matrix.h5"
## [3] "D20_filtered_feature_bc_matrix.h5" "D26_filtered_feature_bc_matrix.h5"
## [5] "W10_filtered_feature_bc_matrix.h5" "W16_filtered_feature_bc_matrix.h5"
## [7] "W20_filtered_feature_bc_matrix.h5" "W26_filtered_feature_bc_matrix.h5"
# Create a list of count matrices


h5_read <- lapply(paste0(
  "C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/R project dowloaded from manual/GettingStarted_scRNASeq/data/",
  files
), Read10X_h5)

# Automated way to assign names; modify for your purposes
# names(h5_read)<- sapply(files,
#                        function(x){str_split_1(x,"_")[1]},
#                        USE.NAMES = FALSE)

# Assign names manually
names(h5_read) <- c("D10", "D16", "D20", "D26", "W10", "W16", "W20", "W26")
#########################################################################
# Single sample
##   W10 <- CreateSeuratObject(counts=W10, project="W10", min.cells = 3, min.features = 200)
# View W10
## W10
#########################################################################

# All samples
adp <- mapply(CreateSeuratObject,
  counts = h5_read,
  project = names(h5_read),
  MoreArgs = list(min.cells = 3, min.features = 200)
)
# View adp
adp
## $D10
## An object of class Seurat 
## 19084 features across 7623 samples within 1 assay 
## Active assay: RNA (19084 features, 0 variable features)
##  1 layer present: counts
## 
## $D16
## An object of class Seurat 
## 16943 features across 5990 samples within 1 assay 
## Active assay: RNA (16943 features, 0 variable features)
##  1 layer present: counts
## 
## $D20
## An object of class Seurat 
## 18242 features across 5093 samples within 1 assay 
## Active assay: RNA (18242 features, 0 variable features)
##  1 layer present: counts
## 
## $D26
## An object of class Seurat 
## 17751 features across 7436 samples within 1 assay 
## Active assay: RNA (17751 features, 0 variable features)
##  1 layer present: counts
## 
## $W10
## An object of class Seurat 
## 19059 features across 7382 samples within 1 assay 
## Active assay: RNA (19059 features, 0 variable features)
##  1 layer present: counts
## 
## $W16
## An object of class Seurat 
## 17024 features across 5465 samples within 1 assay 
## Active assay: RNA (17024 features, 0 variable features)
##  1 layer present: counts
## 
## $W20
## An object of class Seurat 
## 18354 features across 5427 samples within 1 assay 
## Active assay: RNA (18354 features, 0 variable features)
##  1 layer present: counts
## 
## $W26
## An object of class Seurat 
## 17373 features across 6013 samples within 1 assay 
## Active assay: RNA (17373 features, 0 variable features)
##  1 layer present: counts
# remove the original sparse matrices
rm(h5_read)
adp <- merge(adp[[1]],
  y = adp[2:length(adp)],
  add.cell.ids = names(adp), project = "Adipose"
)

#############################################################################################################
# If you want to apply QC metrics independently for each sample, you can use this for
# loop to create an individual object for each sample.
## for (file in files){
##  seurat_data <- Read10X_h5(paste0("C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/R project dowloaded from manual/GettingStarted_scRNASeq/data/",
##                                   file))
##  seurat_obj <- CreateSeuratObject(counts = seurat_data,
##                                   min.features = 200,
##                                   min.cells = 3,
##                                   project = file)
##  assign(file, seurat_obj)
## }


# You can see the primary slots using:
## glimpse(W10_filtered_feature_bc_matrix.h5)
# to go back to the names layer
## Layers(W10_filtered_feature_bc_matrix.h5)

## W10_filtered_feature_bc_matrix.h5[["RNA"]]$counts |> head()
# or
## W10_filtered_feature_bc_matrix.h5@assays$RNA$counts  |> head()
# or
## LayerData(W10_filtered_feature_bc_matrix.h5, assay="RNA", layer='counts') |> head()


# The metadata in the Seurat object is located in adp@metadata and contains the
# information associated with each cell.
################################################################################



head(adp@meta.data) # using head to return only the first 6 rows
##                        orig.ident nCount_RNA nFeature_RNA
## D10_AAACCCAAGATGCTTC-1        D10      11188         3383
## D10_AAACCCAAGCTCTTCC-1        D10      32832         5823
## D10_AAACCCAAGTCATCGT-1        D10      28515         5002
## D10_AAACCCAGTAGCGTCC-1        D10      18954         4305
## D10_AAACCCAGTGCACGCT-1        D10      27181         4586
## D10_AAACCCAGTGTAAATG-1        D10       3090         1030
# Access a single column.
head(adp$orig.ident)
## D10_AAACCCAAGATGCTTC-1 D10_AAACCCAAGCTCTTCC-1 D10_AAACCCAAGTCATCGT-1 
##                  "D10"                  "D10"                  "D10" 
## D10_AAACCCAGTAGCGTCC-1 D10_AAACCCAGTGCACGCT-1 D10_AAACCCAGTGTAAATG-1 
##                  "D10"                  "D10"                  "D10"
# or
head(adp[["orig.ident"]])
##                        orig.ident
## D10_AAACCCAAGATGCTTC-1        D10
## D10_AAACCCAAGCTCTTCC-1        D10
## D10_AAACCCAAGTCATCGT-1        D10
## D10_AAACCCAGTAGCGTCC-1        D10
## D10_AAACCCAGTGCACGCT-1        D10
## D10_AAACCCAGTGTAAATG-1        D10
# acess multiple columns
# Access multiple columns, rows.
head(adp[[c("orig.ident", "nCount_RNA")]])[1:3, ]
##                        orig.ident nCount_RNA
## D10_AAACCCAAGATGCTTC-1        D10      11188
## D10_AAACCCAAGCTCTTCC-1        D10      32832
## D10_AAACCCAAGTCATCGT-1        D10      28515
# to save the Seurat object

saveRDS(adp, "C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/outputs/adp_merged.rds")
library(Seurat) # Seurat toolkit
library(hdf5r) # for data import
library(patchwork) # for plotting
library(glmGamPoi) # for sctransform


# read the object
## adp <- readRDS("C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/outputs/adp_merged.rds")


glimpse(adp)
## Formal class 'Seurat' [package "SeuratObject"] with 13 slots
##   ..@ assays      :List of 1
##   .. ..$ RNA:Formal class 'Assay5' [package "SeuratObject"] with 8 slots
##   ..@ meta.data   :'data.frame': 50429 obs. of  3 variables:
##   .. ..$ orig.ident  : chr [1:50429] "D10" "D10" "D10" "D10" ...
##   .. ..$ nCount_RNA  : num [1:50429] 11188 32832 28515 18954 27181 ...
##   .. ..$ nFeature_RNA: int [1:50429] 3383 5823 5002 4305 4586 1030 5015 3927 4006 5489 ...
##   ..@ active.assay: chr "RNA"
##   ..@ active.ident: Factor w/ 8 levels "D10","D16","D20",..: 1 1 1 1 1 1 1 1 1 1 ...
##   .. ..- attr(*, "names")= chr [1:50429] "D10_AAACCCAAGATGCTTC-1" "D10_AAACCCAAGCTCTTCC-1" "D10_AAACCCAAGTCATCGT-1" "D10_AAACCCAGTAGCGTCC-1" ...
##   ..@ graphs      : list()
##   ..@ neighbors   : list()
##   ..@ reductions  : list()
##   ..@ images      : list()
##   ..@ project.name: chr "Adipose"
##   ..@ misc        : list()
##   ..@ version     :Classes 'package_version', 'numeric_version'  hidden list of 1
##   .. ..$ : int [1:3] 5 1 0
##   ..@ commands    : list()
##   ..@ tools       : list()
# calculate meitocondril genes porcentage for quality control and normalization

adp[["percent.mt"]] <- PercentageFeatureSet(adp, pattern = "^mt-")
# set colors
cnames <- setNames(rep(c("cyan3", "darkgoldenrod1"), each = 4), levels(factor(adp@meta.data$orig.ident)))
cnames
##              D10              D16              D20              D26 
##          "cyan3"          "cyan3"          "cyan3"          "cyan3" 
##              W10              W16              W20              W26 
## "darkgoldenrod1" "darkgoldenrod1" "darkgoldenrod1" "darkgoldenrod1"
# plot total counts per sample
VlnPlot(adp, features = "nCount_RNA", layer = "counts", group.by = "orig.ident", raster = FALSE, alpha = 0.2) +
  scale_fill_manual(values = cnames)

# or using ggplot2
adp@meta.data %>%
  ggplot(aes(color = orig.ident, x = nCount_RNA, fill = orig.ident)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  scale_x_log10() +
  geom_vline(xintercept = 650, color = "red", linetype = "dotted")

# determine number of features (genes)

VlnPlot(adp, features = "nFeature_RNA", group.by = "orig.ident") +
  scale_fill_manual(values = cnames)

# visualize % mitocondrial genes

VlnPlot(adp, features = "percent.mt", group.by = "orig.ident") +
  scale_fill_manual(values = cnames) +
  geom_hline(yintercept = 10, color = "red")

# add time point metadata day 0 or day 6
adp$time_point <- ifelse(stringr::str_detect(adp@meta.data$orig.ident, "0"),
  "Day 0", "Day 6"
)


adp$condition <- ifelse(stringr::str_detect(adp@meta.data$orig.ident, "^W"),
  "WT", "DKO"
)

# a condition and time point merged metadata
adp$condition_tp <- paste(adp$condition, adp$time_point)

# scatter the point by number of features and mitocndrial genes

FeatureScatter(adp, feature1 = "percent.mt", feature2 = "nFeature_RNA", group.by = "orig.ident", split.by = "time_point")

# and by number of fetures and total counts
FeatureScatter(adp, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "orig.ident", split.by = "time_point", log = TRUE)

# filter
# Set one set of parameters for Day 0 samples;
# keep the rownames (Cell barcodes)
t0 <- adp@meta.data |>
  filter(
    time_point == "Day 0", nFeature_RNA > 350,
    nCount_RNA > 650, percent.mt < 10
  ) |>
  tibble::rownames_to_column("Cell") |>
  pull(Cell)

# Set an alternative set of thresholds for Day 6 samples;
# keep the rownames (Cell barcodes)
t6 <- adp@meta.data |>
  filter(
    time_point == "Day 6", nFeature_RNA > 350,
    nCount_RNA > 650, percent.mt < 25
  ) |>
  tibble::rownames_to_column("Cell") |>
  pull(Cell)

keep <- c(t0, t6)
# use different parameters; established above
adp_filt <- subset(adp, cells = keep)

# save file after filtering

saveRDS(adp_filt, "C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/outputs/adp_merge_filt.rds")
# run sctransform
adp_filt <- SCTransform(adp_filt, vars.to.regress = "percent.mt", verbose = FALSE)

# Check default assay
############ DefaultAssay(object = adp_filt)

# Set default assay
########### DefaultAssay(object = adp_filt) <- "RNA"


# run PCA

adp_filt <- RunPCA(adp_filt, verbose = FALSE, assay = "SCT")

# visualizethte first 9 PC


DimHeatmap(adp_filt, dims = 1:9, cells = 500, balanced = TRUE, ncol = 3)

# perform the elbow analisys
ElbowPlot(adp_filt, ndims = 40)

# performing the neighbors analisys to prepara for the clusterin
adp_filt <- FindNeighbors(adp_filt, dims = 1:30)

# calculate the clusterin (suggessted redolution 0.4-1.2)

adp_filt <- FindClusters(adp_filt, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 42114
## Number of edges: 1478117
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9569
## Number of communities: 8
## Elapsed time: 10 seconds
# UMAP for the viasualization of the clusters

adp_filt <- RunUMAP(adp_filt, dims = 1:30)
DimPlot(adp_filt,
  reduction = "pca", group.by = c("orig.ident", "seurat_clusters", "time_point", "condition", "condition_tp"),
  alpha = 0.2, ncol = 2
)

DimPlot(adp_filt,
  reduction = "umap", group.by = c("orig.ident", "seurat_clusters", "time_point", "condition", "condition_tp"),
  alpha = 0.2, ncol = 2
)

# save the adp filtered file

saveRDS(adp_filt, "C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/outputs/adp_merge_filt_sctran_clust0.1.rds")
# prepare the SCT data for the search of markers among clusters
adp_filt <- PrepSCTFindMarkers(adp_filt, verbose = T)


# For differential expression by finding markers (many method available, default a Wilcox rank sum. Look up test.use parameters for options)
# requires presto installation to speed up the next step
# devtools::install_github('immunogenomics/presto')
library(presto)
# find all markers
adp_filt_markers <- FindAllMarkers(adp_filt, only.pos = TRUE)
# ordering the results
adp_filt_markers <- adp_filt_markers %>%
  arrange(cluster, desc(avg_log2FC), desc(p_val_adj))

# examine a small subset
adp_filt_markers %>%
  group_by(cluster) %>%
  slice_max(n = 5, order_by = avg_log2FC)
## # A tibble: 41 × 7
## # Groups:   cluster [8]
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene     
##        <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>    
##  1 2.41e- 49       4.48 0.01  0.001 4.61e- 45 0       Erv3     
##  2 0               4.27 0.163 0.011 0         0       Mlana    
##  3 1.67e-144       4.00 0.032 0.002 3.20e-140 0       Pdlim3   
##  4 0               3.88 0.11  0.008 0         0       Mamdc2   
##  5 9.70e-146       3.72 0.033 0.002 1.85e-141 0       Nat8f3   
##  6 0               3.61 0.261 0.042 0         1       Cxcl13   
##  7 4.17e- 75       3.39 0.016 0.001 7.96e- 71 1       Dlx3     
##  8 1.85e- 86       3.38 0.019 0.002 3.53e- 82 1       Hcn4     
##  9 0               3.29 0.389 0.049 0         1       Serpina3m
## 10 9.23e- 40       3.19 0.01  0.001 1.76e- 35 1       Cstdc4   
## # ℹ 31 more rows
# markers visualization
top20 <- adp_filt_markers %>%
  group_by(cluster) %>%
  dplyr::filter(avg_log2FC > 1) %>%
  slice_head(n = 20) %>%
  ungroup()
DoHeatmap(adp_filt, features = top20$gene) + NoLegend()

# associate pubblished markers with cell type
# contaminants
contam <- c("Ptprc", "Plp1", "Pecam1")
# beige adipocytes
beige <- c("Ucp1", "Ppargc1a", "Elovl3", "Cidea")
# preadipocytes
preadip <- c("Mmp3", "Cd142", "Itgb1")
# proliferating
prolif <- "Mki67"
# differentiating adipocytes
diffadip <- c("Col5a3", "Serpina3n")

VlnPlot(adp_filt, features = contam)

# visualize in the cluster the cel type corresponding to the  feature= parameter above (this case contam)
FeaturePlot(adp_filt, features = contam)

saveRDS(adp_filt_markers, "C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/outputs/adp_merge_filt_markers.rds")
DefaultAssay(adp_filt)
## [1] "SCT"
mem.maxVSize(vsize = 15000)
## [1] Inf
glimpse(adp_filt)
## Formal class 'Seurat' [package "SeuratObject"] with 13 slots
##   ..@ assays      :List of 2
##   .. ..$ RNA:Formal class 'Assay5' [package "SeuratObject"] with 8 slots
##   .. ..$ SCT:Formal class 'SCTAssay' [package "Seurat"] with 9 slots
##   ..@ meta.data   :'data.frame': 42114 obs. of  11 variables:
##   .. ..$ orig.ident     : chr [1:42114] "D10" "D10" "D10" "D10" ...
##   .. ..$ nCount_RNA     : num [1:42114] 11188 32832 28515 18954 27181 ...
##   .. ..$ nFeature_RNA   : int [1:42114] 3383 5823 5002 4305 4586 5015 3927 4006 5489 4093 ...
##   .. ..$ percent.mt     : num [1:42114] 3.16 1.8 1.39 1.89 1.33 ...
##   .. ..$ time_point     : chr [1:42114] "Day 0" "Day 0" "Day 0" "Day 0" ...
##   .. ..$ condition      : chr [1:42114] "DKO" "DKO" "DKO" "DKO" ...
##   .. ..$ condition_tp   : chr [1:42114] "DKO Day 0" "DKO Day 0" "DKO Day 0" "DKO Day 0" ...
##   .. ..$ nCount_SCT     : num [1:42114] 22992 25464 25285 23590 25170 ...
##   .. ..$ nFeature_SCT   : int [1:42114] 3790 5742 4974 4280 4552 4956 3904 4005 5363 4077 ...
##   .. ..$ SCT_snn_res.0.1: Factor w/ 8 levels "0","1","2","3",..: 8 1 1 1 1 1 8 1 3 7 ...
##   .. ..$ seurat_clusters: Factor w/ 8 levels "0","1","2","3",..: 8 1 1 1 1 1 8 1 3 7 ...
##   ..@ active.assay: chr "SCT"
##   ..@ active.ident: Factor w/ 8 levels "0","1","2","3",..: 8 1 1 1 1 1 8 1 3 7 ...
##   .. ..- attr(*, "names")= chr [1:42114] "D10_AAACCCAAGATGCTTC-1" "D10_AAACCCAAGCTCTTCC-1" "D10_AAACCCAAGTCATCGT-1" "D10_AAACCCAGTAGCGTCC-1" ...
##   ..@ graphs      :List of 2
##   .. ..$ SCT_nn :Formal class 'Graph' [package "SeuratObject"] with 7 slots
##   .. ..$ SCT_snn:Formal class 'Graph' [package "SeuratObject"] with 7 slots
##   ..@ neighbors   : list()
##   ..@ reductions  :List of 2
##   .. ..$ pca :Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
##   .. ..$ umap:Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
##   ..@ images      : list()
##   ..@ project.name: chr "Adipose"
##   ..@ misc        : list()
##   ..@ version     :Classes 'package_version', 'numeric_version'  hidden list of 1
##   .. ..$ : int [1:3] 5 1 0
##   ..@ commands    :List of 5
##   .. ..$ SCTransform.RNA      :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
##   .. ..$ RunPCA.SCT           :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
##   .. ..$ FindNeighbors.SCT.pca:Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
##   .. ..$ FindClusters         :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
##   .. ..$ RunUMAP.SCT.pca      :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
##   ..@ tools       : list()
table(adp_filt$condition_tp)
## 
## DKO Day 0 DKO Day 6  WT Day 0  WT Day 6 
##     11655     10512     11807      8140
Idents(adp_filt) <- "SCT_snn_res.0.1"
table(Idents(adp_filt))
## 
##     0     1     2     3     4     5     6     7 
## 17062  7893  6691  6323  2251   979   557   358
######################################################################################
#         Createa new metadata with the expression of gene (example CD3D)           ##
#                                                                                   ##
## gene_name <- "CD3D"                                                              ##
#                                                                                   ##
#    Extract gene expression values from the SCT assay                              ##
## seurat_obj[[gene_name]] <- FetchData(seurat_obj, vars = gene_name, assay = "SCT")##
######################################################################################


# Viewing the code, JoinLayers was called to link all the data sets together. This is
# a new feature of Seurat5, and is required for analyzing data after integration and batch correction. In this case fuse all the samples (D10,D16 , etc.)

# scRNA distributions are usually no normal. For this reason Student T test is usually not applied as it assume normality.
#instead Wilcos rank sum is preferes.
#A continuation the GAPDH distribution is analized to show the non-normal shape of the distribution tipical in scRNA.
plot(density(sample(JoinLayers(adp_filt@assays$RNA)$count["Gapdh", ], 2500)), cex = 0, lty = 1, main = "Density of Gapdh in 2500 random cells")

hist(sample(JoinLayers(adp@assays$RNA)$count["Gapdh", ], 2500), breaks = 99, main = "Histogram of Gapdh in 2500 random cells", ylab = "Frequency", xlab = "Gene counts")

###############################################################################
##        how estract expression matrixes                                    ##
##  expr<-GetAssayData(adp_filt,assay = "SCT",slot = "data")                 ##
##  expr_df <- as.data.frame(as.matrix(expr)) %>%                            ##
##   rownames_to_column(var = "gene") %>%                                    ##
##   pivot_longer(-gene, names_to = "cell", values_to = "expression")        ##
###############################################################################


adp_pp <- adp_filt

Idents(adp_pp) <- "SCT"
adp_pp_mks <- PrepSCTFindMarkers(adp_pp)
Idents(adp_pp_mks) <- "time_point"
table(Idents(adp_pp_mks))
## 
## Day 0 Day 6 
## 23462 18652
## to find different expression between a label and the others in a metadata idents (if between two specific labels "idents.2=" can be used to specify.)
Day0_Day6_DE <- FindMarkers(adp_pp_mks, ident.1 = "Day 0", test.use = "wilcox", min.pct = 0.01, logfc.threshold = 0.1)
Idents(adp_pp_mks) <- "SCT_snn_res.0.1"
DefaultAssay(adp_pp_mks) <- "SCT"

## find all markers between clusters

de_allClusters <- FindAllMarkers(adp_pp_mks, test.use = "wilcox", min.pct = 0.1, only.pos = TRUE)

head(de_allClusters)
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster   gene
## Emp3       0   1.547753 0.882 0.276         0       0   Emp3
## Tagln      0   1.761800 0.869 0.285         0       0  Tagln
## Mfap5      0   1.983247 0.967 0.410         0       0  Mfap5
## Igfbp6     0   2.156417 0.820 0.269         0       0 Igfbp6
## Acta2      0   1.658171 0.808 0.282         0       0  Acta2
## Cd9        0   1.769068 0.771 0.250         0       0    Cd9
## to check the most positivelly different markers per cluster
top5PerCluster <- matrix(ncol = 7)
colnames(top5PerCluster) <- colnames(de_allClusters)
for (i in 0:7) {
  top5PerCluster <- rbind(top5PerCluster, head(de_allClusters[which(de_allClusters$cluster == i), ], 5))
}
top5PerCluster <- top5PerCluster[-1, ]
top5PerCluster
##          p_val avg_log2FC pct.1 pct.2 p_val_adj cluster    gene
## Emp3         0  1.5477527 0.882 0.276         0       0    Emp3
## Tagln        0  1.7617999 0.869 0.285         0       0   Tagln
## Mfap5        0  1.9832469 0.967 0.410         0       0   Mfap5
## Igfbp6       0  2.1564170 0.820 0.269         0       0  Igfbp6
## Acta2        0  1.6581714 0.808 0.282         0       0   Acta2
## A2m          0  2.1957915 0.645 0.228         0       1     A2m
## Folh1        0  2.7411588 0.476 0.078         0       1   Folh1
## Ifi207       0  1.8621548 0.760 0.366         0       1  Ifi207
## Scara5       0  3.1733128 0.433 0.052         0       1  Scara5
## Fzd1         0  1.6533596 0.613 0.244         0       1    Fzd1
## Mki67        0  4.9253267 0.935 0.071         0       2   Mki67
## Pclaf        0  4.7515750 0.940 0.078         0       2   Pclaf
## Birc5        0  4.6546210 0.931 0.078         0       2   Birc5
## Tpx2         0  4.6761468 0.900 0.080         0       2    Tpx2
## Top2a        0  4.6970243 0.942 0.125         0       2   Top2a
## Car3         0  0.4251338 0.921 0.188         0       3    Car3
## A2m1         0  1.5737209 0.905 0.200         0       3     A2m
## Cd361        0  1.1656463 0.958 0.255         0       3    Cd36
## Cdo1         0  1.2164844 0.916 0.240         0       3    Cdo1
## Pnpla2       0  1.3997130 0.936 0.293         0       3  Pnpla2
## Lipe1        0  4.1800830 0.947 0.158         0       4    Lipe
## Plin11       0  3.9131311 0.933 0.162         0       4   Plin1
## Slc36a21     0  4.4061239 0.839 0.071         0       4 Slc36a2
## Arxes12      0  3.8682761 0.886 0.129         0       4  Arxes1
## Pcx1         0  3.7818126 0.891 0.149         0       4     Pcx
## Pdgfb        0  6.1167040 0.192 0.005         0       5   Pdgfb
## Cdh5         0  7.9942350 0.176 0.007         0       5    Cdh5
## Upp1         0  6.7224151 0.171 0.003         0       5    Upp1
## Pecam1       0  8.3300366 0.171 0.003         0       5  Pecam1
## Cldn5        0  9.4795361 0.157 0.001         0       5   Cldn5
## Tyrobp       0 11.2962603 0.996 0.006         0       6  Tyrobp
## C1qc         0 11.4420963 0.996 0.007         0       6    C1qc
## Fcer1g       0 11.0250753 1.000 0.012         0       6  Fcer1g
## Trem2        0 10.4617098 0.993 0.006         0       6   Trem2
## Ctss         0 10.9513900 0.991 0.005         0       6    Ctss
## Lrg11        0  5.2286151 0.975 0.169         0       7    Lrg1
## Wfdc211      0  7.6023043 0.835 0.035         0       7  Wfdc21
## Plin41       0  3.7349377 0.897 0.115         0       7   Plin4
## Slc5a31      0  4.2728744 0.888 0.160         0       7  Slc5a3
## Ccl91        0  5.8271171 0.757 0.063         0       7    Ccl9
DoHeatmap(adp_pp_mks, features = top5PerCluster$gene, slot = "scale.data")

## visualization of specific features

fig1 <- DimPlot(adp_pp_mks, group.by = "time_point")
fig2 <- FeaturePlot(adp_pp_mks, features = "Acta2", order = T)
fig3 <- FeaturePlot(adp_pp_mks, features = "Cd36", order = T)

fig1 / (fig2 | fig3)

## is possible to join all cells as a seample and do a pseudobulk analisys

pseudo_adp <- AggregateExpression(adp_pp_mks, assays = "SCT", return.seurat = T, group.by = c("orig.ident", "time_point", "condition", "condition_tp"))

head(pseudo_adp@assays$SCT$counts)
## 6 x 8 sparse Matrix of class "dgCMatrix"
##         D10_Day 0_DKO_DKO Day 0 D16_Day 6_DKO_DKO Day 6 D20_Day 0_DKO_DKO Day 0
## Xkr4                        188                      30                     146
## Gm19938                     202                      66                     135
## Sox17                        52                       .                      30
## Mrpl15                     5878                    3159                    4103
## Lypla1                     1475                    1216                    1267
## Tcea1                      9416                    3132                    5955
##         D26_Day 6_DKO_DKO Day 6 W10_Day 0_WT_WT Day 0 W16_Day 6_WT_WT Day 6
## Xkr4                         26                   231                    31
## Gm19938                      61                   179                    51
## Sox17                         .                    60                     .
## Mrpl15                     3909                  5546                  2603
## Lypla1                     1598                  1738                  1100
## Tcea1                      3769                  9304                  2924
##         W20_Day 0_WT_WT Day 0 W26_Day 6_WT_WT Day 6
## Xkr4                      175                    32
## Gm19938                   115                    62
## Sox17                      25                     .
## Mrpl15                   4609                  3309
## Lypla1                   1272                  1161
## Tcea1                    6589                  3580
pseudo_adp@meta.data
##                                      orig.ident time_point condition
## D10_Day 0_DKO_DKO Day 0 D10_Day 0_DKO_DKO Day 0      Day 0       DKO
## D16_Day 6_DKO_DKO Day 6 D16_Day 6_DKO_DKO Day 6      Day 6       DKO
## D20_Day 0_DKO_DKO Day 0 D20_Day 0_DKO_DKO Day 0      Day 0       DKO
## D26_Day 6_DKO_DKO Day 6 D26_Day 6_DKO_DKO Day 6      Day 6       DKO
## W10_Day 0_WT_WT Day 0     W10_Day 0_WT_WT Day 0      Day 0        WT
## W16_Day 6_WT_WT Day 6     W16_Day 6_WT_WT Day 6      Day 6        WT
## W20_Day 0_WT_WT Day 0     W20_Day 0_WT_WT Day 0      Day 0        WT
## W26_Day 6_WT_WT Day 6     W26_Day 6_WT_WT Day 6      Day 6        WT
##                         condition_tp
## D10_Day 0_DKO_DKO Day 0    DKO Day 0
## D16_Day 6_DKO_DKO Day 6    DKO Day 6
## D20_Day 0_DKO_DKO Day 0    DKO Day 0
## D26_Day 6_DKO_DKO Day 6    DKO Day 6
## W10_Day 0_WT_WT Day 0       WT Day 0
## W16_Day 6_WT_WT Day 6       WT Day 6
## W20_Day 0_WT_WT Day 0       WT Day 0
## W26_Day 6_WT_WT Day 6       WT Day 6
glimpse(pseudo_adp)
## Formal class 'Seurat' [package "SeuratObject"] with 13 slots
##   ..@ assays      :List of 1
##   .. ..$ SCT:Formal class 'Assay5' [package "SeuratObject"] with 8 slots
##   ..@ meta.data   :'data.frame': 8 obs. of  4 variables:
##   .. ..$ orig.ident  : chr [1:8] "D10_Day 0_DKO_DKO Day 0" "D16_Day 6_DKO_DKO Day 6" "D20_Day 0_DKO_DKO Day 0" "D26_Day 6_DKO_DKO Day 6" ...
##   .. ..$ time_point  : chr [1:8] "Day 0" "Day 6" "Day 0" "Day 6" ...
##   .. ..$ condition   : chr [1:8] "DKO" "DKO" "DKO" "DKO" ...
##   .. ..$ condition_tp: chr [1:8] "DKO Day 0" "DKO Day 6" "DKO Day 0" "DKO Day 6" ...
##   ..@ active.assay: chr "SCT"
##   ..@ active.ident: Factor w/ 8 levels "D10_Day 0_DKO_DKO Day 0",..: 1 2 3 4 5 6 7 8
##   .. ..- attr(*, "names")= chr [1:8] "D10_Day 0_DKO_DKO Day 0" "D16_Day 6_DKO_DKO Day 6" "D20_Day 0_DKO_DKO Day 0" "D26_Day 6_DKO_DKO Day 6" ...
##   ..@ graphs      : list()
##   ..@ neighbors   : list()
##   ..@ reductions  : list()
##   ..@ images      : list()
##   ..@ project.name: chr "Aggregate"
##   ..@ misc        : list()
##   ..@ version     :Classes 'package_version', 'numeric_version'  hidden list of 1
##   .. ..$ : int [1:3] 5 1 0
##   ..@ commands    :List of 1
##   .. ..$ ScaleData.SCT:Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
##   ..@ tools       : list()
# just to clean up the look a little bit
pseudo_adp <- RenameCells(pseudo_adp, new.names = gsub("_.*", "", pseudo_adp$orig.ident))
pseudo_adp$orig.ident <- gsub("_.*", "", pseudo_adp$orig.ident)
head(pseudo_adp@assays$SCT$counts)
## 6 x 8 sparse Matrix of class "dgCMatrix"
         D10  D16  D20  D26  W10  W16  W20  W26
Xkr4     188   30  146   26  231   31  175   32
Gm19938  202   66  135   61  179   51  115   62
Sox17     52    .   30    .   60    .   25    .
Mrpl15  5878 3159 4103 3909 5546 2603 4609 3309
Lypla1  1475 1216 1267 1598 1738 1100 1272 1161
Tcea1   9416 3132 5955 3769 9304 2924 6589 3580
pseudo_adp@meta.data
##     orig.ident time_point condition condition_tp
## D10        D10      Day 0       DKO    DKO Day 0
## D16        D16      Day 6       DKO    DKO Day 6
## D20        D20      Day 0       DKO    DKO Day 0
## D26        D26      Day 6       DKO    DKO Day 6
## W10        W10      Day 0        WT     WT Day 0
## W16        W16      Day 6        WT     WT Day 6
## W20        W20      Day 0        WT     WT Day 0
## W26        W26      Day 6        WT     WT Day 6
## performin bulk DE

Idents(pseudo_adp) <- "time_point"

## DESeq2 if not istalled:  BiocManager::install("DESeq2")
bulk_adp_de <- FindMarkers(pseudo_adp, ident.1 = "Day 0", ident.2 = "Day 6", test.use = "DESeq2")
head(bulk_adp_de)
##        p_val avg_log2FC pct.1 pct.2 p_val_adj
## Nabp1      0  -1.960008     1     1         0
## Gpr176     0   1.624052     1     1         0
## Myl9       0   4.101615     1     1         0
## Pltp       0   1.256316     1     1         0
## Tm4sf1     0   3.128734     1     1         0
## Etv3       0  -1.190553     1     1         0
# comparing how many differentially expressed genes between SC and bulk analisys in function of the conditions

scDE.genes <- rownames(Day0_Day6_DE)[which(Day0_Day6_DE$p_val_adj < 0.05)]
bulkDE.genes <- rownames(bulk_adp_de)[which(bulk_adp_de$p_val_adj < 0.05)]
length(scDE.genes)
## [1] 10342
length(bulkDE.genes)
## [1] 8526
## heck the common features between sc and bulk
length(intersect(scDE.genes, bulkDE.genes))
## [1] 6619
head(intersect(scDE.genes, bulkDE.genes), 100)
##   [1] "Emp3"      "Tagln"     "Acta2"     "Tpm2"      "Cd36"      "A2m"      
##   [7] "Cd9"       "Rbp4"      "Mfap5"     "Igfbp6"    "Myl9"      "Anxa3"    
##  [13] "Timp2"     "Car3"      "Cyba"      "Cnn2"      "Thy1"      "Tpm1"     
##  [19] "Lbh"       "Actn1"     "Cdo1"      "Scd1"      "Ccn4"      "Rhoc"     
##  [25] "Pdlim2"    "Ccn5"      "Timp3"     "Ccn2"      "Selenom"   "Basp1"    
##  [31] "Clic1"     "Pnpla2"    "Phldb2"    "Col5a3"    "Phgdh"     "F3"       
##  [37] "Arl4a"     "mt-Nd6"    "Thbs1"     "Crip1"     "Arxes2"    "Mfap4"    
##  [43] "Lrrc32"    "Map1b"     "Ddah2"     "Cnn3"      "Fhl2"      "Ahnak2"   
##  [49] "Prelp"     "Npdc1"     "Rbp1"      "Mmp23"     "Vat1"      "Cyb5r1"   
##  [55] "Tuba1a"    "Nenf"      "Serpinf1"  "Bst2"      "Hes1"      "Arid5b"   
##  [61] "Atf5"      "Myof"      "Lgalsl"    "Fabp4"     "Adamts5"   "Tbcb"     
##  [67] "Ctsk"      "Flna"      "Prg4"      "Btg1"      "Adm"       "Acsl1"    
##  [73] "Eva1b"     "Asns"      "S100a4"    "Abracl"    "Emp2"      "Ywhah"    
##  [79] "Plk2"      "Plec"      "Mxra7"     "Tm4sf1"    "Metrnl"    "Tgfb1i1"  
##  [85] "Nupr1"     "Glul"      "Msn"       "Vps29"     "Ccl7"      "Axl"      
##  [91] "Sri"       "Pls3"      "Cygb"      "Gpx1"      "Ifi207"    "Gpx8"     
##  [97] "Prdx4"     "Serpina3n" "Ttc28"     "Adipoq"
## to chech spefic features

bulk_adp_de[c("Acta2", "Cd36"), ]
##              p_val avg_log2FC pct.1 pct.2    p_val_adj
## Acta2 0.000000e+00   5.482773     1     1 0.000000e+00
## Cd36  2.413872e-93  -4.063080     1     1 4.611462e-89
## visualize the DE genes

Idents(adp_pp) <- "SCT_snn_res.0.1"
DotPlot(adp_pp, features = unique(top5PerCluster$gene), dot.scale = 3) + coord_flip()

# violine plot as alternative visualization

Idents(adp_pp) <- "time_point"
VlnPlot(adp_pp, features = c("Acta2", "Cd36"), alpha = 0.1)

## anotate the differential genes

markers <- c("Mmp3", "Mki67", "Fabp4", "Scd1", "Ucp1", "Ppargc1a", "Elovl3", "Cidea")
Idents(adp_pp) <- "SCT_snn_res.0.1"
avgExp <- AverageExpression(adp_pp, markers, assay = "SCT")$SCT
avgExp
## 8 x 8 sparse Matrix of class "dgCMatrix"
##                    g0          g1           g2           g3          g4
## Mmp3     1.008188e+01  2.41986570  2.056792707   0.63925352   0.1550422
## Mki67    6.781151e-02  0.09261371  5.246151547   0.38178080   0.1257219
## Fabp4    3.001700e+00 52.90814646 24.516514721 159.78317254 452.0910706
## Scd1     6.420701e-01  7.15912834  2.371244956  26.48774316  80.5135495
## Ucp1     .             0.00836184  0.003138544   0.01534082   0.1332741
## Ppargc1a 3.868245e-03  0.04003547  0.012703632   0.10975803   0.8187472
## Elovl3   6.447075e-04  0.06461422  0.028097444   0.17744741   1.3083074
## Cidea    1.406635e-03  0.04408970  0.013301450   0.13000158   0.7250111
##                    g5          g6           g7
## Mmp3      2.413687436 1.448833034   1.16201117
## Mki67     0.373850868 1.935368043   0.24022346
## Fabp4    14.020429009 2.181328546 132.08379888
## Scd1      1.168539326 0.210053860  68.46648045
## Ucp1      .           .             .         
## Ppargc1a  0.008171604 .             0.05027933
## Elovl3    0.007150153 .             .         
## Cidea     0.008171604 0.001795332   .
DimPlot(adp_pp, label = T)

FeaturePlot(adp_pp, features = markers, ncol = 3, order = T)

## after finding the markers associated with a particular cluster we can annotate for the cell type.

adipocyte <- vector(length = ncol(adp_pp))
adipocyte[which(adp_pp$SCT_snn_res.0.1 %in% c(0, 5))] <- "Preadipocytes"
adipocyte[which(adp_pp$SCT_snn_res.0.1 %in% c(2, 6))] <- "Proliferating cells"
adipocyte[which(adp_pp$SCT_snn_res.0.1 %in% c(1, 3))] <- "Differentiating beige adipocytes"
adipocyte[which(adp_pp$SCT_snn_res.0.1 %in% c(4))] <- "Differentiated beige adipocytes"
adipocyte[which(adp_pp$SCT_snn_res.0.1 %in% c(7))] <- "Unclassified"
adp_pp$adipocyte <- adipocyte

f1 <- DimPlot(adp_pp, group.by = "SCT_snn_res.0.1", label = T) + NoLegend()
f2 <- DimPlot(adp_pp, group.by = "time_point") + NoLegend()
f3 <- DimPlot(adp_pp, group.by = "adipocyte", label = T) + NoLegend()
(f1 / f2 | f3)

## SingleR annotation. This method annotate each cell in the dataset against a reference dataset.
library(SingleR) # for cell type annotation; Bioconductor
library(celldex) # for cell type annotation reference; Bioconductor
library(MAST) # for differential expression; Bioconductor

adp.sce <- as.SingleCellExperiment(adp_pp, assay = "SCT") # This selects *only* the SCT assay
mouseRNASeq <- celldex::MouseRNAseqData()
head(mouseRNASeq)
## class: SummarizedExperiment 
## dim: 6 358 
## metadata(0):
## assays(1): logcounts
## rownames(6): Xkr4 Rp1 ... Lypla1 Tcea1
## rowData names(0):
## colnames(358): ERR525589Aligned ERR525592Aligned ... SRR1044043Aligned
##   SRR1044044Aligned
## colData names(3): label.main label.fine label.ont
table(mouseRNASeq$label.main)
## 
##        Adipocytes        Astrocytes           B cells    Cardiomyocytes 
##                13                27                 5                 8 
##   Dendritic cells Endothelial cells  Epithelial cells      Erythrocytes 
##                 2                12                 2                 3 
##       Fibroblasts      Granulocytes       Hepatocytes       Macrophages 
##                45                15                 4                32 
##         Microglia         Monocytes           Neurons          NK cells 
##                72                 6                64                18 
##  Oligodendrocytes           T cells 
##                12                18
table(mouseRNASeq$label.fine)
## 
##            Adipocytes                 aNSCs            Astrocytes 
##                    13                     8                    24 
##  Astrocytes activated               B cells        Cardiomyocytes 
##                     3                     5                     8 
##       Dendritic cells     Endothelial cells             Ependymal 
##                     2                    12                     2 
##          Erythrocytes           Fibroblasts Fibroblasts activated 
##                     3                    27                     9 
## Fibroblasts senescent          Granulocytes           Hepatocytes 
##                     9                    15                     4 
##           Macrophages Macrophages activated             Microglia 
##                    26                     6                    56 
##   Microglia activated             Monocytes               Neurons 
##                    16                     6                    39 
##     Neurons activated              NK cells                  NPCs 
##                     4                    18                    11 
##      Oligodendrocytes                  OPCs                 qNSCs 
##                    10                     2                     2 
##               T cells 
##                    18
annot <- SingleR::SingleR(test = adp.sce, ref = mouseRNASeq, labels = mouseRNASeq$label.main)
head(annot)
## DataFrame with 6 rows and 4 columns
##                                                scores      labels delta.next
##                                              <matrix> <character>  <numeric>
## D10_AAACCCAAGATGCTTC-1 0.523246:0.275495:0.210562:...  Adipocytes  0.2110472
## D10_AAACCCAAGCTCTTCC-1 0.499957:0.250442:0.226092:... Fibroblasts  0.1624889
## D10_AAACCCAAGTCATCGT-1 0.474176:0.213152:0.189308:... Fibroblasts  0.3422910
## D10_AAACCCAGTAGCGTCC-1 0.449344:0.246099:0.227259:... Fibroblasts  0.0504144
## D10_AAACCCAGTGCACGCT-1 0.399770:0.179617:0.207093:... Fibroblasts  0.0520780
## D10_AAACCCATCCACTGAA-1 0.498567:0.249653:0.189627:... Fibroblasts  0.0797964
##                        pruned.labels
##                          <character>
## D10_AAACCCAAGATGCTTC-1    Adipocytes
## D10_AAACCCAAGCTCTTCC-1   Fibroblasts
## D10_AAACCCAAGTCATCGT-1   Fibroblasts
## D10_AAACCCAGTAGCGTCC-1   Fibroblasts
## D10_AAACCCAGTGCACGCT-1   Fibroblasts
## D10_AAACCCATCCACTGAA-1   Fibroblasts
# if a label is to weak during SingleR the cell is taged as NA. Now we add the labels determine to the metadata of the Seurat object

table(annot$pruned.labels, useNA = "ifany") # useNA can be used turned on in the `table` function
## 
##        Adipocytes Endothelial cells       Fibroblasts       Macrophages 
##             13406               103             27696               478 
##              <NA> 
##               431
adp_pp$mouseRNASeq.main <- annot$pruned.labels

## let's visualize the final result

annotFig1 <- DimPlot(adp_pp, group.by = "adipocyte", label = T) + NoLegend()
annotFig2 <- DimPlot(adp_pp, group.by = "mouseRNASeq.main", label = T)

annotFig1 | annotFig2

## annotation by cluster

Idents(adp_pp) <- "SCT_snn_res.0.1" # Assign clusters as the identities
avgExp <- AverageExpression(adp_pp, assays = "SCT")$SCT # Run AverageExpression on the SCT assay and return only SCT
clustAnnot <- SingleR::SingleR(test = avgExp, ref = mouseRNASeq, labels = mouseRNASeq$label.main) # Run SingleR on the averaged expression matrix
clustAnnot
## DataFrame with 8 rows and 4 columns
##                            scores      labels delta.next pruned.labels
##                          <matrix> <character>  <numeric>   <character>
## g0 0.764947:0.405104:0.313582:... Fibroblasts  0.0900693   Fibroblasts
## g1 0.811652:0.425415:0.305177:... Fibroblasts  0.1090396   Fibroblasts
## g2 0.762817:0.391869:0.314699:... Fibroblasts  0.0699542   Fibroblasts
## g3 0.822446:0.419011:0.303251:...  Adipocytes  0.0520561    Adipocytes
## g4 0.837119:0.421358:0.311282:...  Adipocytes  0.1554873    Adipocytes
## g5 0.730079:0.366069:0.360429:... Fibroblasts  0.0609226   Fibroblasts
## g6 0.570732:0.211146:0.541797:... Macrophages  0.1006733   Macrophages
## g7 0.828817:0.388983:0.313862:...  Adipocytes  0.0646115    Adipocytes
clustLabels <- as.vector(clustAnnot$pruned.labels) # retrieve only the cluster-derived annotations
names(clustLabels) <- c(0:7) # assign the cluster numbers as the annotations
clustLabels.vect <- clustLabels[match(adp_pp$SCT_snn_res.0.1, names(clustLabels))] # match the cluster identities per cell in the Seurat data to the cluster labels
names(clustLabels.vect) <- colnames(adp_pp) # ensure that the cluster identities are assigned the cell names
adp_pp$mouseRNASeq.main.clust <- clustLabels.vect # add the cluster annotations to the vector

clustAnnotFig1 <- DimPlot(adp_pp, group.by = "SCT_snn_res.0.1", label = T) + NoLegend()
clustAnnotFig2 <- DimPlot(adp_pp, group.by = "adipocyte", label = T) + NoLegend()
clustAnnotFig3 <- DimPlot(adp_pp, group.by = "mouseRNASeq.main")
clustAnnotFig4 <- DimPlot(adp_pp, group.by = "mouseRNASeq.main.clust")

(clustAnnotFig1 | clustAnnotFig2) / (clustAnnotFig3 | clustAnnotFig4)